import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
The project concerns the construction of a neural network to recognise a butterfly based on an image. For this purpose I used CNN to limit the number of inputs.
"""At the moment I am considering 5 species"""
image_0 = cv2.imread('Dataset/Train/Aglais_io_1.jpg',cv2.IMREAD_COLOR)
image_0 = cv2.cvtColor(image_0, cv2.COLOR_BGR2RGB)
image_1 = cv2.imread('Dataset/Train/Argynnis_paphia_1.jpg',cv2.IMREAD_COLOR)
image_1 = cv2.cvtColor(image_1, cv2.COLOR_BGR2RGB)
image_2 = cv2.imread('Dataset/Train/Nymphalis_antiopa_1.jpg',cv2.IMREAD_COLOR)
image_2 = cv2.cvtColor(image_2, cv2.COLOR_BGR2RGB)
image_3 = cv2.imread('Dataset/Train/Papilio_machaon_1.jpg',cv2.IMREAD_COLOR)
image_3 = cv2.cvtColor(image_3, cv2.COLOR_BGR2RGB)
image_4 = cv2.imread('Dataset/Train/Vanessa_atalanta_1.jpg',cv2.IMREAD_COLOR)
image_4 = cv2.cvtColor(image_4, cv2.COLOR_BGR2RGB)
fig = plt.figure(figsize=(24,24));
ax1 = fig.add_subplot(1,5,1);
ax1.imshow(image_0);
ax1.set_title('Aglais io', fontsize=15, color= 'Red', fontweight='bold');
ax2 = fig.add_subplot(1,5,2)
ax2.imshow(image_1);
ax2.set_title('Argynnis paphia', fontsize=15, color= 'Red', fontweight='bold');
ax3 = fig.add_subplot(1,5,3)
ax3.imshow(image_2);
ax3.set_title('Nymphalis antiopa', fontsize=15, color= 'Red', fontweight='bold');
ax4 = fig.add_subplot(1,5,4)
ax4.imshow(image_3);
ax4.set_title('Papilio machaon', fontsize=15, color= 'Red', fontweight='bold');
ax5 = fig.add_subplot(1,5,5)
ax5.imshow(image_4);
ax5.set_title('Vanessa atalanta', fontsize=15, color= 'Red', fontweight='bold');
To make it easier, the name of the images are matched with the species of butterflys. For example Aglais_io_1.jpg
#from keras.preprocessing.image import ImageDataGenerator
"""rotation_range : rotation by +/- 45 degrees,
width_shift_range : pixel shift by width by distance -/+ 0.2,
height_shift_range : pixel shift by height by distance -/+ 0.2,
shear_range : Shear angle in counter-clockwise direction in degrees,
zoom_range : Range for random zoom [1-zoom_range, 1+zoom_range],
horizontal_flip : Randomly flip inputs horizontally,
fill_mode : Fill dark pixels after the image has been processed,
brightness_range : brightens or darkens the generated image. """
#datagen = ImageDataGenerator(
# rotation_range=70,
# width_shift_range=0.2,
# height_shift_range=0.2,
# shear_range=0.2,
# zoom_range=0.2,
# brightness_range=[0.3,1.2],
# horizontal_flip=True,
# fill_mode='reflect',
#)
'rotation_range : rotation by +/- 45 degrees,\nwidth_shift_range : pixel shift by width by distance -/+ 0.2,\nheight_shift_range : pixel shift by height by distance -/+ 0.2,\nshear_range : Shear angle in counter-clockwise direction in degrees,\nzoom_range : Range for random zoom [1-zoom_range, 1+zoom_range],\nhorizontal_flip : Randomly flip inputs horizontally,\nfill_mode : Fill dark pixels after the image has been processed, \nbrightness_range : brightens or darkens the generated image. '
def images_generation(input_path, output_path, batch_size, num_of_processes_per_image):
"""Generate images from existing ones. For each image, a number of images is generated according to num_of_processes_per_image"""
try:
for image in os.listdir(input_path):
prefix = image.replace('.jpg','')
image = cv2.imread(f'{input_path}/{image}',cv2.IMREAD_COLOR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = image.reshape((1,) + image.shape)
i = 1
for batch in datagen.flow(image,batch_size=batch_size,save_to_dir=output_path, save_prefix = f'{prefix}', save_format='jpg'):
i += 1
if i == num_of_processes_per_image:
break
except FileNotFoundError:
print("No such file or directory. Check the source path or destination folder ")
#images_generation('Dataset/Train','Dataset/Train_samples', 10, 52)
I have generated images using ImageDataGenerator provided by keras.preprocessing.image to increase the number of images for learning and to reduce the possibility of model overfitting. I know that is not the best possibilites but i had a small number of butterfly images. The images I downloaded from google were unlicensed. Of course, the images have already been generated.
Changing images to matrices and mapping them to categories.
import re
import random
def make_set(path):
"""Main idea: read images, convert bgr to rgb,rescaling to 128x128, normalisation to 0-1 , labeling in numpy array format based on the image file name. Then append list [image, label] into dataset list
Finaly convertion of the list into X numpy array with images matrixes, adn y numpy array with labels."""
try:
dataset = []
for i in os.listdir(path):
image = cv2.imread(f'{path}/{i}',cv2.IMREAD_COLOR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image,(110, 110))
image = image / 255.0 #normalization
if re.findall("^Aglais_io", i):
label = [1, 0, 0, 0, 0]
elif re.findall("^Argynnis_paphia", i):
label =[0, 1, 0, 0, 0]
elif re.findall("^Nymphalis_antiopa", i):
label =[0, 0, 1, 0, 0]
elif re.findall("^Papilio_machaon", i):
label =[0, 0, 0, 1, 0]
else:
label =[0, 0, 0, 0, 1]
dataset.append([image, label])
random.shuffle(dataset)
X = []
y = []
for image, label in dataset:
X.append(image)
y.append(label)
return np.array(X, dtype = 'float32'), np.array(y, dtype = 'int32')
except FileNotFoundError:
print("No such file or directory. Check the source path or destination folder ")
#X_train, y_train = make_set('Dataset/Train_samples')
#X_valid, y_valid = make_set('Dataset/Validation')
#X_test, y_test = make_set('Dataset/Test')
import pickle
def save_ready_dataset(X, y, path, X_name, y_name):
try:
pickle_out = open(f"{path}/{X_name}.pickle","wb")
pickle.dump(X, pickle_out)
pickle_out.close()
pickle_out = open(f"{path}/{y_name}.pickle","wb")
pickle.dump(y, pickle_out)
pickle_out.close()
except FileNotFoundError:
print("No such file or directory. Check the source path or destination folder ")
#save_ready_dataset(X_train, y_train, path = 'Ready_Datasets\Train', X_name = 'X_train', y_name ='y_train')
#save_ready_dataset(X_valid, y_valid, path = 'Ready_Datasets\Validation', X_name = 'X_valid', y_name ='y_valid')
#save_ready_dataset(X_test, y_test, path = 'Ready_Datasets\Test', X_name = 'X_test', y_name ='y_test')
I have saved/ serialized datasets using pickle.
# You can load the datasets if you have saved it using save_ready_dataset function
import pickle
"""Train"""
with open('Ready_Datasets\Train\X_train.pickle',"rb") as pickle_in_X_train:
X_train = pickle.load(pickle_in_X_train)
with open('Ready_Datasets\Train\y_train.pickle',"rb") as pickle_in_y_train:
y_train = pickle.load(pickle_in_y_train)
"""Validation"""
with open('Ready_Datasets\Validation\X_valid.pickle',"rb") as pickle_in_X_valid:
X_valid = pickle.load(pickle_in_X_valid)
with open('Ready_Datasets\Validation\y_valid.pickle',"rb") as pickle_in_y_valid:
y_valid = pickle.load(pickle_in_y_valid)
"""Test"""
with open('Ready_Datasets\Test\X_test.pickle',"rb") as pickle_in_X_test:
X_test = pickle.load(pickle_in_X_test)
with open('Ready_Datasets\Test\y_test.pickle',"rb") as pickle_in_y_test:
y_test = pickle.load(pickle_in_y_test)
def count_label_dist(y):
labels_dist_dict = {'Aglais io':0, 'Argynnis paphia':0, 'Nymphalis antiopa':0, 'Papilio machaon':0, 'Vanessa atalanta':0}
for i in y:
if np.array_equal([1, 0, 0, 0, 0], i, equal_nan=False):
labels_dist_dict['Aglais io'] += 1
elif np.array_equal([0, 1, 0, 0, 0], i, equal_nan=False):
labels_dist_dict['Argynnis paphia'] += 1
elif np.array_equal([0, 0, 1, 0, 0], i, equal_nan=False):
labels_dist_dict['Nymphalis antiopa'] += 1
elif np.array_equal([0, 0, 0, 1, 0], i, equal_nan=False):
labels_dist_dict['Papilio machaon'] += 1
else:
labels_dist_dict['Vanessa atalanta'] += 1
return labels_dist_dict
import numpy as np
train_distibution = count_label_dist(y_train)
valid_distibution = count_label_dist(y_valid)
test_distibution = count_label_dist(y_test)
import pandas as pd
Dataset_Distribution = pd.DataFrame({'Train distribution': (train_distibution),
'Validation distribution': (valid_distibution),
'Test distribution': (test_distibution)},
index=["Aglais io", "Argynnis paphia" , "Nymphalis antiopa", "Papilio machaon", "Vanessa atalanta"])
colors = ['gold', 'orangered', 'forestgreen', 'turquoise', 'darkmagenta']
fig = plt.figure(figsize=(40,60));
ax1 = fig.add_subplot(1,3,1);
labels_train = [f"Aglais io: {Dataset_Distribution['Train distribution'][0]}",
f"Argynnis paphia: {Dataset_Distribution['Train distribution'][1]}",
f"Nymphalis antiopa: {Dataset_Distribution['Train distribution'][2]}",
f"Papilio machaon: {Dataset_Distribution['Train distribution'][3]}",
f"Vanessa atalanta: {Dataset_Distribution['Train distribution'][4]}"]
ax1.pie(Dataset_Distribution['Train distribution'], colors=colors, autopct='%.0f%%',textprops={'fontsize': 16});
ax1.set_title('Distribution of butterfly species in the train set', fontsize=20, color= 'indigo', fontweight='bold',);
ax1.legend(labels_train, loc='lower left', fontsize=20);
ax2 = fig.add_subplot(1,3,2)
labels_valid = [f"Aglais io: {Dataset_Distribution['Validation distribution'][0]}",
f"Argynnis paphia: {Dataset_Distribution['Validation distribution'][1]}",
f"Nymphalis antiopa: {Dataset_Distribution['Validation distribution'][2]}",
f"Papilio machaon: {Dataset_Distribution['Validation distribution'][3]}",
f"Vanessa atalanta: {Dataset_Distribution['Validation distribution'][4]}"]
ax2.pie(Dataset_Distribution['Validation distribution'], colors=colors, autopct='%.0f%%',textprops={'fontsize': 16});
ax2.set_title('Distribution of butterfly species in the validation set', fontsize=20, color= 'indigo', fontweight='bold');
ax2.legend(labels_valid, loc='lower left', fontsize=20);
ax3 = fig.add_subplot(1,3,3)
labels_test= [f"Aglais io: {Dataset_Distribution['Test distribution'][0]}",
f"Argynnis paphia: {Dataset_Distribution['Test distribution'][1]}",
f"Nymphalis antiopa: {Dataset_Distribution['Test distribution'][2]}",
f"Papilio machaon: {Dataset_Distribution['Test distribution'][3]}",
f"Vanessa atalanta: {Dataset_Distribution['Test distribution'][4]}"]
ax3.pie(Dataset_Distribution['Test distribution'], colors=colors, autopct='%.0f%%',textprops={'fontsize': 16});
ax3.set_title('Distribution of butterfly species in the test set', fontsize=20, color= 'indigo', fontweight='bold');
ax3.legend(labels_test, loc='lower left', fontsize=20);
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
#from keras.callbacks import CSVLogger
#csv_logger = CSVLogger('Model Stats.log', separator=',', append=False)
def model_training():
model = Sequential() # model with exactly one input tensor and one output tensor. That's why i used output as numpy array of shape (5,) for example [0,1,0,0,0]
model.add(Conv2D(128, (3, 3), input_shape=X_train.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('softplus'))
model.add(Dropout(0.5))
model.add(Dense(5, activation=tf.nn.softmax))
model.compile(loss='categorical_crossentropy',
optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
metrics=['accuracy','AUC',])
cnn = model.fit(X_train, y_train, batch_size=32, epochs=30, validation_data=(X_valid, y_valid),callbacks=[csv_logger])
return cnn
"""Model is already saved"""
#model_train()
'Model is already saved'
"""saving the model as the name suggests"""
#model.save('CNN_Model1')
'saving the model as the name suggests'
I have selected model scheme by trial and error method, evaluating many models with different numbers of convolutional layer, with different number of filters and so on.
#from tensorflow import keras
cnn = keras.models.load_model('CNN_Model1')
cnn.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 108, 108, 128) 3584
activation (Activation) (None, 108, 108, 128) 0
max_pooling2d (MaxPooling2D (None, 54, 54, 128) 0
)
conv2d_1 (Conv2D) (None, 52, 52, 128) 147584
activation_1 (Activation) (None, 52, 52, 128) 0
max_pooling2d_1 (MaxPooling (None, 26, 26, 128) 0
2D)
conv2d_2 (Conv2D) (None, 24, 24, 128) 147584
activation_2 (Activation) (None, 24, 24, 128) 0
max_pooling2d_2 (MaxPooling (None, 12, 12, 128) 0
2D)
conv2d_3 (Conv2D) (None, 10, 10, 128) 147584
activation_3 (Activation) (None, 10, 10, 128) 0
max_pooling2d_3 (MaxPooling (None, 5, 5, 128) 0
2D)
conv2d_4 (Conv2D) (None, 3, 3, 128) 147584
activation_4 (Activation) (None, 3, 3, 128) 0
max_pooling2d_4 (MaxPooling (None, 1, 1, 128) 0
2D)
dropout (Dropout) (None, 1, 1, 128) 0
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
activation_5 (Activation) (None, 128) 0
dropout_1 (Dropout) (None, 128) 0
dense_1 (Dense) (None, 5) 645
=================================================================
Total params: 611,077
Trainable params: 611,077
Non-trainable params: 0
_________________________________________________________________
#from tensorflow import keras
#cnn = keras.models.load_model('CNN_Model1')
def conf_matrix(y_predict, y_actual):
y1 = []
for i in range(len(y_predict)):
max_val = y_predict[i].max()
pred_array = []
for index, value in enumerate(y_predict[i]):
if value == max_val:
pred_array.insert(index, 1.)
else:
pred_array.insert(index, 0.)
y1.append(pred_array)
y1 = np.array(y1, dtype ='int')
y_predict = y1
conf_matrix = np.zeros((5,5), dtype='int')
species_list = [[1,0,0,0,0],[0,1,0,0,0],[0,0,1,0,0],[0,0,0,1,0],[0,0,0,0,1]]
for i in range(len(y_actual)):
for j in range(len(species_list)):
if (y_predict[i] == species_list[j]).all() and (y_actual[i] == species_list[0]).all():
conf_matrix[j,0] +=1
break
elif (y_predict[i] == species_list[j]).all() and (y_actual[i] == species_list[1]).all():
conf_matrix[j,1] +=1
break
elif (y_predict[i] == species_list[j]).all() and (y_actual[i] == species_list[2]).all():
conf_matrix[j,2] +=1
break
elif (y_predict[i] == species_list[j]).all() and (y_actual[i] == species_list[3]).all():
conf_matrix[j,3] +=1
break
elif (y_predict[i] == species_list[j]).all() and (y_actual[i] == species_list[4]).all():
conf_matrix[j,4] +=1
break
return conf_matrix
import numpy as np
import pandas as pd
def color_matrix(df):
array = np.full(df.shape, 'background-color: Tomato', dtype='<U24')
np.fill_diagonal(array, 'background-color: Green')
return pd.DataFrame( array, index=df.index, columns=df.columns)
def draw_conf_matrix(conf_matrix, name):
Conf_Matrix = pd.DataFrame(conf_matrix,
index=pd.Index(["Aglais io", "Argynnis paphia" , "Nymphalis antiopa", "Papilio machaon", "Vanessa atalanta"], name='Predict ↓'),
columns=pd.MultiIndex.from_product([[f'Confusion Matrix for {name} dataset'],["Aglais io", "Argynnis paphia" , "Nymphalis antiopa", "Papilio machaon", "Vanessa atalanta"]], names=['', 'Actual →']))
return Conf_Matrix.style.apply(color_matrix, axis=None).set_table_styles([
{'selector': 'th.col_heading.level0', 'props': 'font-size: 2em;'},
{'selector': 'th','props': 'font-size: 1.25em; background-color: #428df5; color: white; text-align: center;'},
{'selector': 'th.col_heading', 'props': 'font-size: 1.25em; text-align: center;'},
{'selector': 'th.row_heading', 'props': 'font-size: 1.25em; text-align: center;'},
{'selector': 'td', 'props': 'text-align: center; font-weight: bold;font-size: 1.5em;'},])
def Evaluation(conf_matrix, setname):
""" Methoda determines the model evaluation metrics such as Recall/Sensitivity, Precision, Specifity, F-Score, Accuracy."""
#Recall/Sensitivity
Aglais_io_sensitivity = conf_matrix[0,0] /(conf_matrix[0,0] + np.sum(np.delete(conf_matrix[:,0], 0)))
Argynnis_paphi_sensitivity = conf_matrix[1,1] /(conf_matrix[1,1] + np.sum(np.delete(conf_matrix[:,1], 1)))
Nymphalis_antiopa_sensitivity = conf_matrix[2,2] /(conf_matrix[2,2] + np.sum(np.delete(conf_matrix[:,2], 2)))
Papilio_machaon_sensitivity = conf_matrix[3,3] /(conf_matrix[3,3] + np.sum(np.delete(conf_matrix[:,3], 3)))
Vanessa_atalanta_sensitivity = conf_matrix[4,4] /(conf_matrix[4,4] + np.sum(np.delete(conf_matrix[:,4], 4)))
Avg_sensitivity = (Aglais_io_sensitivity+Argynnis_paphi_sensitivity+Nymphalis_antiopa_sensitivity+Papilio_machaon_sensitivity+Vanessa_atalanta_sensitivity)/5
#Precision
Aglais_io_precision = conf_matrix[0,0] /(conf_matrix[0,0] + np.sum(np.delete(conf_matrix[0,:], 0)))
Argynnis_paphi_precision = conf_matrix[1,1] /(conf_matrix[1,1] + np.sum(np.delete(conf_matrix[1,:], 1)))
Nymphalis_antiopa_precision = conf_matrix[2,2] /(conf_matrix[2,2] + np.sum(np.delete(conf_matrix[2,:], 2)))
Papilio_machaon_precision = conf_matrix[3,3] /(conf_matrix[3,3] + np.sum(np.delete(conf_matrix[3,:], 3)))
Vanessa_atalanta_precision = conf_matrix[4,4] /(conf_matrix[4,4] + np.sum(np.delete(conf_matrix[4,:], 4)))
Avg_precision = (Aglais_io_precision+Argynnis_paphi_precision+Nymphalis_antiopa_precision+Papilio_machaon_precision+Vanessa_atalanta_precision)/5
#Specifity
Aglais_io_specifity = np.sum(np.delete(np.delete(conf_matrix, 0, 0),0,1))/(np.sum(np.delete(np.delete(conf_matrix, 0, 0),0,1)) + np.sum(np.delete(conf_matrix[0,:], 0)))
Argynnis_paphi_specifity = np.sum(np.delete(np.delete(conf_matrix, 1, 0),1,1))/(np.sum(np.delete(np.delete(conf_matrix, 1, 0),1,1)) + np.sum(np.delete(conf_matrix[1,:], 1)))
Nymphalis_antiopa_specifity = np.sum(np.delete(np.delete(conf_matrix, 2, 0),2,1))/(np.sum(np.delete(np.delete(conf_matrix, 2, 0),2,1)) + np.sum(np.delete(conf_matrix[2,:], 2)))
Papilio_machaon_specifity = np.sum(np.delete(np.delete(conf_matrix, 3, 0),3,1))/(np.sum(np.delete(np.delete(conf_matrix, 3, 0),3,1)) + np.sum(np.delete(conf_matrix[3,:], 3)))
Vanessa_atalanta_specifity = np.sum(np.delete(np.delete(conf_matrix, 4, 0),4,1))/(np.sum(np.delete(np.delete(conf_matrix, 4, 0),4,1)) + np.sum(np.delete(conf_matrix[4,:], 4)))
Avg_specifity = (Aglais_io_specifity+Argynnis_paphi_specifity+Nymphalis_antiopa_specifity+Papilio_machaon_specifity+Vanessa_atalanta_specifity)/5
#F1_score
Aglais_io_f1_score = (2*Aglais_io_precision * Aglais_io_sensitivity)/(Aglais_io_precision + Aglais_io_sensitivity)
Argynnis_paphi_f1_score = (2*Argynnis_paphi_precision * Argynnis_paphi_sensitivity)/(Argynnis_paphi_precision + Argynnis_paphi_sensitivity)
Nymphalis_antiopa_f1_score = (2*Nymphalis_antiopa_precision * Nymphalis_antiopa_sensitivity)/(Nymphalis_antiopa_precision + Nymphalis_antiopa_sensitivity)
Papilio_machaon_f1_score = (2*Papilio_machaon_precision * Papilio_machaon_sensitivity)/(Papilio_machaon_precision + Papilio_machaon_sensitivity)
Vanessa_atalanta_f1_score = (2*Vanessa_atalanta_precision * Vanessa_atalanta_sensitivity)/(Vanessa_atalanta_precision + Vanessa_atalanta_sensitivity)
Avg_f1_score = (Aglais_io_f1_score+Argynnis_paphi_f1_score+Nymphalis_antiopa_f1_score+Papilio_machaon_f1_score+Vanessa_atalanta_f1_score)/5
#Accuracy
Accuracy = (conf_matrix[0,0] + conf_matrix[1,1] + conf_matrix[2,2] +conf_matrix[3,3]+conf_matrix[4,4])/ np.sum(conf_matrix)
Scores_table = [[Aglais_io_sensitivity, Aglais_io_precision, Aglais_io_specifity, Aglais_io_f1_score],
[Argynnis_paphi_sensitivity, Argynnis_paphi_precision, Argynnis_paphi_specifity, Argynnis_paphi_f1_score],
[Nymphalis_antiopa_sensitivity, Nymphalis_antiopa_precision, Nymphalis_antiopa_specifity,Nymphalis_antiopa_f1_score],
[Papilio_machaon_sensitivity, Papilio_machaon_precision, Papilio_machaon_specifity, Papilio_machaon_f1_score],
[Vanessa_atalanta_sensitivity, Vanessa_atalanta_precision, Vanessa_atalanta_specifity, Vanessa_atalanta_f1_score],
[Avg_sensitivity, Avg_precision, Avg_specifity, Avg_f1_score],
['','','',Accuracy]
]
Evaluation_scores = pd.DataFrame(Scores_table
, index = ["Aglais io", "Argynnis paphia" , "Nymphalis antiopa", "Papilio machaon", "Vanessa atalanta","Avarage value", "Accuracy"],
columns=pd.MultiIndex.from_product([[f'Evaluation metrics for {setname} dataset '],["Recall/Sensivity", "Precision", "Specifity", "F1-Score"]])
)
return Evaluation_scores.style.set_table_styles([
{'selector': 'th.col_heading.level0', 'props': 'font-size: 2.0em; text-align: centre; background-color: #ac1aeb'},
{'selector': 'th.col_heading', 'props': 'font-size: 1.5em; text-align: center; background-color: #ac1aeb'},
{'selector': 'th.row_heading', 'props': 'font-size: 1.5em; text-align: center; background-color: #ac1aeb'},
{'selector': 'th','props': 'background-color: #ac1aeb;'},
{'selector': 'td', 'props': 'font-size: 1.5em; text-align: center; background-color: #b0eb1a; color: green;'},
])
The methods of evaluation I have chosen are Confusion Matrix and on its basis I evaluate the model using Precision, Recall/Sensitivity, Specifity, F1-score for every butterfly category and avarage values for all categories for each methods, finally accuracy for all categories.
Confusion_Matrix_train = conf_matrix(cnn.predict(X_train),y_train)
697/697 [==============================] - 100s 134ms/step
draw_conf_matrix(Confusion_Matrix_train, "Training")
| Confusion Matrix for Training dataset | |||||
|---|---|---|---|---|---|
| Actual → | Aglais io | Argynnis paphia | Nymphalis antiopa | Papilio machaon | Vanessa atalanta |
| Predict ↓ | |||||
| Aglais io | 5236 | 49 | 156 | 117 | 94 |
| Argynnis paphia | 23 | 4199 | 17 | 84 | 35 |
| Nymphalis antiopa | 265 | 7 | 3793 | 15 | 5 |
| Papilio machaon | 60 | 142 | 101 | 3834 | 73 |
| Vanessa atalanta | 68 | 33 | 1 | 17 | 3866 |
Evaluation(conf_matrix = Confusion_Matrix_train, setname = "Training")
| Evaluation metrics for Training dataset | ||||
|---|---|---|---|---|
| Recall/Sensivity | Precision | Specifity | F1-Score | |
| Aglais io | 0.926398 | 0.926398 | 0.974997 | 0.926398 |
| Argynnis paphia | 0.947856 | 0.963515 | 0.991097 | 0.955621 |
| Nymphalis antiopa | 0.932399 | 0.928519 | 0.983975 | 0.930455 |
| Papilio machaon | 0.942710 | 0.910689 | 0.979367 | 0.926423 |
| Vanessa atalanta | 0.949178 | 0.970138 | 0.993468 | 0.959543 |
| Avarage value | 0.939708 | 0.939852 | 0.984581 | 0.939688 |
| Accuracy | 0.938896 | |||
Confusion_Matrix_validation = conf_matrix(cnn.predict(X_valid), y_valid)
33/33 [==============================] - 5s 132ms/step
draw_conf_matrix(Confusion_Matrix_validation, "Validation")
| Confusion Matrix for Validation dataset | |||||
|---|---|---|---|---|---|
| Actual → | Aglais io | Argynnis paphia | Nymphalis antiopa | Papilio machaon | Vanessa atalanta |
| Predict ↓ | |||||
| Aglais io | 209 | 0 | 1 | 16 | 5 |
| Argynnis paphia | 0 | 210 | 6 | 3 | 0 |
| Nymphalis antiopa | 1 | 0 | 197 | 3 | 0 |
| Papilio machaon | 0 | 0 | 6 | 183 | 0 |
| Vanessa atalanta | 0 | 0 | 0 | 4 | 205 |
Evaluation(conf_matrix = Confusion_Matrix_validation, setname = "Validation")
| Evaluation metrics for Validation dataset | ||||
|---|---|---|---|---|
| Recall/Sensivity | Precision | Specifity | F1-Score | |
| Aglais io | 0.995238 | 0.904762 | 0.973778 | 0.947846 |
| Argynnis paphia | 1.000000 | 0.958904 | 0.989273 | 0.979021 |
| Nymphalis antiopa | 0.938095 | 0.980100 | 0.995232 | 0.958637 |
| Papilio machaon | 0.875598 | 0.968254 | 0.992857 | 0.919598 |
| Vanessa atalanta | 0.976190 | 0.980861 | 0.995232 | 0.978520 |
| Avarage value | 0.957024 | 0.958576 | 0.989275 | 0.956725 |
| Accuracy | 0.957102 | |||
Confusion_Matrix_test = conf_matrix(cnn.predict(X_test), y_test)
2/2 [==============================] - 0s 70ms/step
draw_conf_matrix(Confusion_Matrix_test, "Testing")
| Confusion Matrix for Testing dataset | |||||
|---|---|---|---|---|---|
| Actual → | Aglais io | Argynnis paphia | Nymphalis antiopa | Papilio machaon | Vanessa atalanta |
| Predict ↓ | |||||
| Aglais io | 9 | 0 | 0 | 0 | 0 |
| Argynnis paphia | 0 | 10 | 0 | 0 | 0 |
| Nymphalis antiopa | 1 | 0 | 10 | 0 | 0 |
| Papilio machaon | 0 | 0 | 0 | 10 | 0 |
| Vanessa atalanta | 0 | 0 | 0 | 0 | 10 |
Evaluation(conf_matrix = Confusion_Matrix_test, setname = "Testing")
| Evaluation metrics for Testing dataset | ||||
|---|---|---|---|---|
| Recall/Sensivity | Precision | Specifity | F1-Score | |
| Aglais io | 0.900000 | 1.000000 | 1.000000 | 0.947368 |
| Argynnis paphia | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| Nymphalis antiopa | 1.000000 | 0.909091 | 0.975000 | 0.952381 |
| Papilio machaon | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| Vanessa atalanta | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| Avarage value | 0.980000 | 0.981818 | 0.995000 | 0.979950 |
| Accuracy | 0.980000 | |||
def multi_images_prediction(folder_path, model):
"""the method for displaying an image of a butterfly and predicting which species it belongs."""
for i in os.listdir(folder_path):
image = cv2.imread(f'{folder_path}/{i}',cv2.IMREAD_COLOR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_0 = image
image = cv2.resize(image,(110, 110))
image = image / 255.0 #normalization
array_prediction = np.round(model.predict(image.reshape((1,) + image.shape),verbose = 0),2)
mylabels = ["Aglais io", "Argynnis paphia" , "Nymphalis antiopa", "Papilio machaon", "Vanessa atalanta"]
actual_label = f"{i.split('_')[0]} {i.split('_')[1]}"
fig = plt.figure(figsize=(20,10));
ax1 = fig.add_subplot(1,2,1);
ax1.imshow(image_0);
ax1.set_title(f"Actual spieces name: {actual_label}", fontsize=15, color = "green")
ax2 = fig.add_subplot(1,2,2)
ax2.bar(mylabels, array_prediction[0]*100);
for i in range(len(mylabels)):
plt.text(i-0.1,array_prediction[0][i]*100 + 1,f"{round(array_prediction[0][i]*100,2)}%");
ax2.set_ylabel('species probability distribution', fontsize=15);
if mylabels[np.argmax(array_prediction)] == actual_label:
ax2.set_title(f"Predicted spieces name base on argmax: {mylabels[np.argmax(array_prediction)]} ", fontsize=15, color = "green")
else:
ax2.set_title(f"Predicted spieces name base on argmax: {mylabels[np.argmax(array_prediction)]} ", fontsize=15, color = "red")
try:
multi_images_prediction("Dataset/Private_images", cnn)
except NameError:
print("Check the model name or whether such a model exists.")
except FileNotFoundError:
print("No such file or directory. Check the source path or destination folder ")
C:\Users\Cezary\AppData\Local\Temp\ipykernel_14176\2585413579.py:12: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`. fig = plt.figure(figsize=(20,10));